# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from ..items import ScrapyDemoItem


class JianshuSpider(CrawlSpider):
    name = 'jianshu'
    allowed_domains = ['jianshu.com']
    start_urls = ['https://www.jianshu.com/']

    rules = (
        Rule(LinkExtractor(allow=r'.*p/[0-9a-z]{12}.*'),
             callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        author = response.xpath(
            '//div[@class="author"]/div[@class="info"]/div[@class="name"]/a/text()').get()
        pub_time = response.xpath(
            '/html/body/div[1]/div[2]/div[1]/div[1]/div/div/span[2]/text()').get()
        url = response.url
        url_id = url.split('?')[0].split('/')[-1]
        content = response.xpath('//div[@class="show-content-"]')
        item = ScrapyDemoItem(
            author=author, pub_time=pub_time, url=url, url_id=url_id)
        return item
